In [1]:
import os
import time
import numpy as np
import pandas as pd
import pickle as pkl
import matplotlib.pyplot as plt

import seaborn as sns
sns.set_context('notebook', font_scale=1.5, rc={'lines.markeredgewidth': 2})
In [2]:
from sklearn.cluster import SpectralClustering
In [3]:
import visual_behavior_glm.GLM_analysis_tools as gat
In [4]:
import visual_behavior.visualization.utils as utils
import visual_behavior.data_access.loading as loading
import visual_behavior.data_access.utilities as utilities
In [5]:
from visual_behavior.dimensionality_reduction.clustering import processing
from visual_behavior.dimensionality_reduction.clustering import plotting
In [6]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

get experiments and cells tables and limit to closest familiar and novel active

In [7]:
experiments_table = loading.get_platform_paper_experiment_table()
len(experiments_table)
Out[7]:
1249
In [8]:
# limit to closest familiar and novel active
experiments_table = utilities.limit_to_last_familiar_second_novel_active(experiments_table)
experiments_table = utilities.limit_to_containers_with_all_experience_levels(experiments_table)
len(experiments_table)
Out[8]:
402
In [9]:
matched_experiments = experiments_table.index.values
In [10]:
cells_table = loading.get_cell_table()
len(cells_table.cell_specimen_id.unique())
Out[10]:
28833
In [12]:
cells_table = loading.get_matched_cells_table(cells_table)
matched_cells = cells_table.cell_specimen_id.unique()
[autoreload of visual_behavior.dimensionality_reduction.clustering.plotting failed: Traceback (most recent call last):
  File "C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\IPython\extensions\autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\IPython\extensions\autoreload.py", line 434, in superreload
    module = reload(module)
  File "C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\imp.py", line 314, in reload
    return importlib.reload(module)
  File "C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\importlib\__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 630, in _exec
  File "<frozen importlib._bootstrap_external>", line 728, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\plotting.py", line 26, in <module>
    from visual_behavior.dimensionality_reduction.clustering.processing import get_silhouette_scores, get_cluster_density, get_cre_lines, get_cell_type_for_cre_line
ImportError: cannot import name 'get_cre_lines' from 'visual_behavior.dimensionality_reduction.clustering.processing' (c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\processing.py)
]
3921 cells in matched cells table
In [13]:
# get cre_lines and cell types for plot labels
cre_lines = np.sort(cells_table.cre_line.unique())
cell_types = utilities.get_cell_types_dict(cre_lines, experiments_table)

get GLM output, filter and reshape

In [14]:
glm_version = '24_events_all_L2_optimize_by_session'
model_output_type = 'adj_fraction_change_from_full'
In [15]:
base_dir = r'\\allen\programs\braintv\workgroups\nc-ophys\visual_behavior\platform_paper_plots\figure_4'
base_dir = os.path.join(base_dir, glm_version)
if not os.path.exists(base_dir):
    os.mkdir(base_dir)

NOTE: create a new folder if you are testing out this notebook or else it will overwrite existing files & plots

In [16]:
folder = '220308'
save_dir = os.path.join(base_dir, folder)
if not os.path.exists(save_dir):
    os.mkdir(save_dir)
In [17]:
features = processing.get_features_for_clustering()
In [18]:
# get GLM results from saved file in save_dir or from mongo if file doesnt exist
results_pivoted = processing.get_glm_results_pivoted_for_clustering(glm_version, model_output_type, save_dir)
results_pivoted.head()
loading results_pivoted from \\allen\programs\braintv\workgroups\nc-ophys\visual_behavior\platform_paper_plots\figure_4\24_events_all_L2_optimize_by_session\220308\24_events_all_L2_optimize_by_session_results_pivoted.h5
Out[18]:
identifier Full all-images all-omissions behavioral hits image0 image1 image2 image3 ... ophys_container_id project_code imaging_depth targeted_structure date_of_acquisition session_type experience_level passive image_set file_id
141 1003771765_1086489847 0.0 0.000000 0.000000 0.000000 0.0 0.000000 0.0 0.0 0.0 ... 1000740410 VisualBehaviorTask1B 175 VISp 2020-01-29 16:25:58.000000 OPHYS_3_images_B Familiar False B 1003809811
142 1003771765_1086489860 0.0 0.000000 0.000000 0.000000 0.0 0.000000 0.0 0.0 0.0 ... 1000740410 VisualBehaviorTask1B 175 VISp 2020-01-29 16:25:58.000000 OPHYS_3_images_B Familiar False B 1003809811
143 1003771765_1086489891 0.0 0.000000 -0.524089 -0.376901 0.0 0.000000 0.0 0.0 0.0 ... 1000740410 VisualBehaviorTask1B 175 VISp 2020-01-29 16:25:58.000000 OPHYS_3_images_B Familiar False B 1003809811
180 1004403369_1086489847 0.0 -0.310928 0.000000 -0.454319 0.0 -0.158284 0.0 0.0 0.0 ... 1000740410 VisualBehaviorTask1B 175 VISp 2020-01-31 16:32:20.000000 OPHYS_4_images_A Novel 1 False A 1004465164
181 1004403369_1086489860 0.0 -0.910349 -0.109417 0.000000 0.0 0.000000 0.0 0.0 0.0 ... 1000740410 VisualBehaviorTask1B 175 VISp 2020-01-31 16:32:20.000000 OPHYS_4_images_A Novel 1 False A 1004465164

5 rows × 72 columns

In [19]:
# get dropout scores just for the features we want to cluster on
dropouts = processing.limit_results_pivoted_to_features_for_clustering(results_pivoted)
dropouts.head()
c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\processing.py:291: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results[feature] = np.abs(results[feature])
Out[19]:
all-images omissions behavioral task cell_specimen_id experience_level
0 0.034836 0.336736 0.013690 0.492372 1086528400 Familiar
1 0.091568 0.000000 0.086435 0.472242 1086525033 Familiar
2 0.000000 0.000000 0.000000 0.000000 1086524899 Familiar
3 0.935539 0.036787 0.000000 0.063940 1086524622 Familiar
4 0.290107 0.000000 0.464025 0.000000 1086524448 Familiar
In [20]:
# unstack dropout scores to get a vector of features x experience levels for each cell
feature_matrix = processing.get_feature_matrix_for_clustering(dropouts, glm_version, save_dir=save_dir)
feature_matrix.head()
No duplicated cells found
3921
Out[20]:
all-images omissions behavioral task
experience_level Familiar Novel 1 Novel >1 Familiar Novel 1 Novel >1 Familiar Novel 1 Novel >1 Familiar Novel 1 Novel >1
cell_specimen_id
1086489847 0.000000 0.310928 0.017832 0.000000 0.000000 0.0 0.000000 0.454319 0.971563 0.000000 0.009904 0.009827
1086489860 0.000000 0.910349 0.000000 0.000000 0.109417 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
1086489891 0.000000 0.677381 0.000000 0.524089 0.000000 0.0 0.376901 0.020819 1.000000 0.000000 0.025777 0.000000
1086489976 0.817370 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
1086490002 0.852927 0.000000 1.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.021871 0.000000 0.000000
In [21]:
# get cell metadata for all cells in feature_matrix
cell_metadata = processing.get_cell_metadata_for_feature_matrix(feature_matrix, cells_table)
cell_metadata.head()
3921 cells in cell_metadata for feature_matrix
Out[21]:
ophys_experiment_id equipment_name full_genotype mouse_id reporter_line driver_line sex age_in_days cre_line indicator ... area_binned_depth layer date first_novel n_relative_to_first_novel last_familiar last_familiar_active second_novel second_novel_active experience_exposure
cell_specimen_id
1086551315 794381992 CAM2P.4 Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G... 412366 Ai93(TITL-GCaMP6f) [Slc17a7-IRES2-Cre, Camk2a-tTA] F 149.0 Slc17a7-IRES2-Cre GCaMP6f ... VISp_375 lower 20181212 False -1.0 True True False False Familiar 3
1086550804 794381992 CAM2P.4 Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G... 412366 Ai93(TITL-GCaMP6f) [Slc17a7-IRES2-Cre, Camk2a-tTA] F 149.0 Slc17a7-IRES2-Cre GCaMP6f ... VISp_375 lower 20181212 False -1.0 True True False False Familiar 3
1086541251 794381992 CAM2P.4 Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G... 412366 Ai93(TITL-GCaMP6f) [Slc17a7-IRES2-Cre, Camk2a-tTA] F 149.0 Slc17a7-IRES2-Cre GCaMP6f ... VISp_375 lower 20181212 False -1.0 True True False False Familiar 3
1086540341 794381992 CAM2P.4 Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G... 412366 Ai93(TITL-GCaMP6f) [Slc17a7-IRES2-Cre, Camk2a-tTA] F 149.0 Slc17a7-IRES2-Cre GCaMP6f ... VISp_375 lower 20181212 False -1.0 True True False False Familiar 3
1086539950 794381992 CAM2P.4 Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G... 412366 Ai93(TITL-GCaMP6f) [Slc17a7-IRES2-Cre, Camk2a-tTA] F 149.0 Slc17a7-IRES2-Cre GCaMP6f ... VISp_375 lower 20181212 False -1.0 True True False False Familiar 3

5 rows × 40 columns

plot feature matrix for each cre line

In [21]:
plotting.plot_feature_matrix_for_cre_lines(feature_matrix, cell_metadata, save_dir=base_dir, folder=folder)

get Silhouette scores

In [22]:
silhouette_scores = processing.load_silhouette_scores(glm_version, feature_matrix, cell_metadata, save_dir)
loading silhouette scores from \\allen\programs\braintv\workgroups\nc-ophys\visual_behavior\platform_paper_plots\figure_4\24_events_all_L2_optimize_by_session\220308\24_events_all_L2_optimize_by_session_silhouette_scores.pkl
done.

select number of clusters

In [23]:
n_clusters_cre = {'Slc17a7-IRES2-Cre': 10,
                 'Sst-IRES-Cre': 6, 
                 'Vip-IRES-Cre':12}

plot silhouettes with selected # clusters

In [24]:
plotting.plot_silhouette_scores_n_clusters(silhouette_scores, cell_metadata, n_clusters_cre=n_clusters_cre, save_dir=base_dir, folder=folder)

get coclustering matrices per cre line

In [24]:
coclustering_matrices = processing.get_coclustering_matrix(glm_version, feature_matrix, cell_metadata, n_clusters_cre, save_dir, nboot=100)
loading file...
done.

get cluster labels per cre line from Agglomerative clustering on co-clustering matrix

In [25]:
cluster_labels = processing.get_cluster_labels(coclustering_matrices, cell_metadata, n_clusters_cre, save_dir, load=False)
cluster_labels.head()
generating cluster labels from coclustering matrix
saving cluster_labels to \\allen\programs\braintv\workgroups\nc-ophys\visual_behavior\platform_paper_plots\figure_4\24_events_all_L2_optimize_by_session\220308\cluster_labels_Vip_12_Sst_6_Slc17a7_10.h5
Out[25]:
labels cell_specimen_id cre_line cluster_id
0 5 1086492406 Vip-IRES-Cre 5
1 5 1086492307 Vip-IRES-Cre 5
2 4 1086492221 Vip-IRES-Cre 7
3 5 1086492174 Vip-IRES-Cre 5
4 3 1086491936 Vip-IRES-Cre 1

merge cluster labels with cell metadata, remove small clusters, and add manual sort order

In [26]:
cluster_meta = processing.get_cluster_meta(cluster_labels, cell_metadata, feature_matrix, n_clusters_cre, save_dir, load=False)
cluster_meta.head()
generating cluster_meta
dropping 4 cells for ('Slc17a7-IRES2-Cre', 10)
dropping 1 cells for ('Sst-IRES-Cre', 6)
5 cells dropped total
adding within cluster correlation to cluster_meta
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\numpy\lib\function_base.py:2559: RuntimeWarning: invalid value encountered in true_divide
  c /= stddev[:, None]
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\numpy\lib\function_base.py:2560: RuntimeWarning: invalid value encountered in true_divide
  c /= stddev[None, :]
saving cluster_meta to \\allen\programs\braintv\workgroups\nc-ophys\visual_behavior\platform_paper_plots\figure_4\24_events_all_L2_optimize_by_session\220308\cluster_metadata_Vip_12_Sst_6_Slc17a7_10.h5
C:\Users\marinag\AppData\Roaming\Python\Python37\site-packages\pandas\core\generic.py:2530: PerformanceWarning: 
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed-integer,key->block3_values] [items->['equipment_name', 'full_genotype', 'reporter_line', 'driver_line', 'sex', 'cre_line', 'indicator', 'project_code', 'targeted_structure', 'date_of_acquisition', 'session_type', 'experience_level', 'image_set', 'cell_type', 'area_depth', 'area_binned_depth', 'layer', 'experience_exposure', 'manual_sort_order']]

  pytables.to_hdf(path_or_buf, key, self, **kwargs)
Out[26]:
cluster_id labels ophys_experiment_id equipment_name full_genotype mouse_id reporter_line driver_line sex age_in_days ... first_novel n_relative_to_first_novel last_familiar last_familiar_active second_novel second_novel_active experience_exposure original_cluster_id manual_sort_order within_cluster_correlation
cell_specimen_id
1086492406 5 5 795073741 CAM2P.4 Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt 412036 Ai148(TIT2L-GC6f-ICL-tTA2) [Vip-IRES-Cre] F 151.0 ... False -1.0 True True False False Familiar 3 5 7 0.711864
1086492307 5 5 795073741 CAM2P.4 Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt 412036 Ai148(TIT2L-GC6f-ICL-tTA2) [Vip-IRES-Cre] F 151.0 ... False -1.0 True True False False Familiar 3 5 7 0.948394
1086492221 7 4 795073741 CAM2P.4 Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt 412036 Ai148(TIT2L-GC6f-ICL-tTA2) [Vip-IRES-Cre] F 151.0 ... False -1.0 True True False False Familiar 3 7 3 0.857523
1086492174 5 5 795073741 CAM2P.4 Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt 412036 Ai148(TIT2L-GC6f-ICL-tTA2) [Vip-IRES-Cre] F 151.0 ... False -1.0 True True False False Familiar 3 5 7 0.755901
1086491936 1 3 795073741 CAM2P.4 Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt 412036 Ai148(TIT2L-GC6f-ICL-tTA2) [Vip-IRES-Cre] F 151.0 ... False -1.0 True True False False Familiar 3 1 11 -0.029602

5 rows × 45 columns

In [27]:
cluster_meta[['labels', 'cluster_id', 'original_cluster_id', 'manual_sort_order', 'within_cluster_correlation']].head()
Out[27]:
labels cluster_id original_cluster_id manual_sort_order within_cluster_correlation
cell_specimen_id
1086492406 5 5 5 7 0.711864
1086492307 5 5 5 7 0.948394
1086492221 4 7 7 3 0.857523
1086492174 5 5 5 7 0.755901
1086491936 3 1 1 11 -0.029602
In [ ]:
 

plot co-clustering matrix and dendrograms sorted by linkage matrix

In [28]:
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import AgglomerativeClustering
In [31]:
# for i, cre_line in enumerate(processing.get_cre_lines(cluster_meta)):
#     plotting.plot_coclustering_matrix_and_dendrograms(coclustering_matrices, cre_line, cluster_meta, n_clusters_cre, 
#                                                      save_dir=base_dir, folder=folder)
In [ ]:
# data = coclustering_matrices[cre_line]

# fig, ax = plt.subplots(figsize=(8,8))
# ax = sns.clustermap(data=data, cmap='Greys', cbar_kws={'label':'co-clustering probability'})

sort coclustering matrix by cluster ID / cluster size

In [80]:
cre_lines = processing.get_cre_lines(cell_metadata)
for cre_line in cre_lines: 
    plotting.plot_coclustering_matrix_sorted_by_cluster_size(coclustering_matrices, cluster_meta, cre_line, 
                                                    save_dir=None, folder=folder, ax=None)

plot average dropout scores for each cluster

plot each cluster separately and save to single cell examples dir

In [ ]:
cell_examples_dir = os.path.join(save_dir, 'matched_cell_examples')
if not os.path.exists(cell_examples_dir):
    os.mkdir(cell_examples_dir)
In [40]:
plotting.plot_dropout_heatmaps_and_save_to_cell_examples_folders(cluster_meta, feature_matrix, save_dir)
c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\plotting.py:417: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  fig, ax = plt.subplots(figsize=figsize)

plot average dropouts for each cre line in cluster size order

In [42]:
sort_col = 'cluster_id'

plotting.plot_dropout_heatmaps_for_clusters(cluster_meta, feature_matrix, sort_col=sort_col, save_dir=base_dir, folder=folder)

plot dropout heatmaps in manually sorted order

In [43]:
sort_col = 'manual_sort_order'

plotting.plot_dropout_heatmaps_for_clusters(cluster_meta, feature_matrix, sort_col=sort_col, save_dir=base_dir, folder=folder)
In [ ]:
 

plot feature matrix sorted by cluster ID

In [45]:
plotting.plot_feature_matrix_sorted(feature_matrix, cluster_meta, sort_col='cluster_id', save_dir=base_dir, folder=folder)
    
In [47]:
plotting.plot_feature_matrix_sorted(feature_matrix, cluster_meta, sort_col='manual_sort_order', save_dir=base_dir, folder=folder)
    

umap with cluster labels

In [49]:
# for column in ['project_code', 'binned_depth', 'targeted_structure', 'mouse_id']:
label_col = 'cluster_id'

plotting.plot_umap_for_clusters(cluster_meta, feature_matrix, label_col=label_col, save_dir=base_dir, folder=folder)

Correlations within clusters

In [145]:
plotting.plot_within_cluster_correlations_all_cre(cluster_meta, n_clusters_cre, sort_order=None, save_dir=base_dir, folder=folder)
In [144]:
sort_order = processing.get_manual_sort_order()

plotting.plot_within_cluster_correlations_all_cre(cluster_meta, n_clusters_cre, sort_order=sort_order, 
                                                  suffix='_manual_sort', save_dir=base_dir, folder=folder)

average dropouts per cre line

In [59]:
plotting.plot_average_dropout_heatmap_for_cre_lines(dropouts, cluster_meta, save_dir=base_dir, folder=folder)
In [ ]:
 

plot 100 cells per cluster to examine within cluster variability

In [78]:
plotting.plot_random_subset_of_cells_per_cluster(cluster_meta, dropouts, save_dir)
49 cells in cluster 2
selecting a random subset of 49
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-78-ffa557462157> in <module>
----> 1 plotting.plot_random_subset_of_cells_per_cluster(cluster_meta, dropouts, save_dir)

c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\plotting.py in plot_random_subset_of_cells_per_cluster(cluster_meta, dropouts, save_dir)
   1120             fig.tight_layout()
   1121             fig.subplots_adjust(wspace=0.3)
-> 1122             fig.suptitle(get_cell_type_for_cre_line(cre_line)+' cluster '+str(cluster_id), x=0.5, y=1.01)
   1123             filename = cre_line+'_cluster_'+str(cluster_id)
   1124             if save_dir:

TypeError: get_cell_type_for_cre_line() missing 1 required positional argument: 'cre_line'

breakdown by area and depth

We are going to normalize within each area or depth to account for the large imbalance in N due to Scientifica datasets only being performed in V1 at certain depths, as well as biological variation in cell type specific expression by depth

plot fraction cells by area and depth

In [88]:
label = 'fraction of cells'
plotting.plot_fraction_cells_by_area_depth(cluster_meta, n_clusters_cre, normalize=True, label=label, 
                                           save_dir=base_dir, folder=folder)
In [87]:
label = '# of cells'
plotting.plot_fraction_cells_by_area_depth(cluster_meta, n_clusters_cre, normalize=False, label=label, 
                                           save_dir=base_dir, folder=folder)

compute % cells per area / depth relative to chance

to deal with uneven sampling across areas & depths, we will express the fraction of cells per cluster in each area & depth as a percentage relative to chance. We can compute the expected number of cells per area and depth in each cluster based on random sampling of our area/depth distribution, then compute how many cells are actually in each area and depth per cluster and express that as a % relative to chance

To compute the expected number of cells in each cluster based on sampling:

* take size of cluster (n_neurons) and select a random set of cells of that size
* repeat 100x to get a distribution of random cells
* compute the number of cells in each area and depth in the random distribution
* quantify how many cells are actually in each area and depth in the clustered data
* compute significance of actual # cells relative to random distrubution 
* report % cells relative to chance and p_value
In [90]:
cell_count_stats = processing.get_cell_count_stats(cluster_meta, conditions_to_groupby = ['targeted_structure', 'layer'])
In [91]:
cell_count_stats.head()[cell_count_stats.keys()[:16]]
Out[91]:
targeted_structure layer cre_line n_cells_total_cre n_cells_cond_cre fraction_per_cond_cre cluster_id n_cells_total_cluster n_cells_cond_cluster fraction_per_cond_cluster n_cells random_n_cells random_n_cells_mean fraction_of_random relative_to_random n_times_actual_greater_than_random
0 VISl lower Slc17a7-IRES2-Cre 3302 263 0.079649 1 816 55 0.067402 55 [60, 74, 64, 72, 51, 73, 60, 79, 76, 76, 72, 6... 65.27 0.842654 -0.157346 10.0
1 VISl upper Slc17a7-IRES2-Cre 3302 328 0.099334 1 816 134 0.164216 134 [79, 71, 83, 77, 79, 78, 88, 91, 70, 75, 89, 8... 80.58 1.662944 0.662944 100.0
2 VISp lower Slc17a7-IRES2-Cre 3302 1600 0.484555 1 816 244 0.299020 244 [397, 406, 392, 397, 383, 394, 403, 395, 395, ... 395.32 0.617221 -0.382779 0.0
3 VISp upper Slc17a7-IRES2-Cre 3302 1111 0.336463 1 816 383 0.469363 383 [280, 265, 277, 270, 303, 271, 265, 251, 275, ... 274.83 1.393589 0.393589 100.0
0 VISl lower Slc17a7-IRES2-Cre 3302 263 0.079649 2 585 46 0.078632 46 [47, 42, 49, 50, 52, 42, 35, 41, 53, 47, 42, 4... 45.66 1.007446 0.007446 51.0
In [677]:
# save stats
cell_count_stats.to_csv(os.path.join(save_dir, 'cell_count_stats.csv'))

heatmap of % rel chance

fraction of cells relative to random distribution

In [98]:
value_to_plot = 'relative_to_random'
cbar_label = 'fraction cells\nrel. to chance'
suffix = '_limited_range'

plotting.plot_cell_stats_per_cluster_for_areas_depths(cluster_meta, cell_count_stats, n_clusters_cre, 
                                                      value_to_plot, cbar_label, cmap='PRGn', vmin=-1, vmax=1, 
                                                      cluster_order=None, suffix=suffix, save_dir=base_dir, folder=folder)
In [97]:
value_to_plot = 'relative_to_random'
cbar_label = 'fraction cells\nrel. to chance'
suffix = '_full_range'

vmax = cell_count_stats.relative_to_random.max()
vmin = cell_count_stats.relative_to_random.min()

plotting.plot_cell_stats_per_cluster_for_areas_depths(cluster_meta, cell_count_stats, n_clusters_cre, 
                                             value_to_plot, cbar_label, cmap='PRGn', vmin=-vmax, vmax=vmax, 
                                             cluster_order=None, save_dir=base_dir, folder=folder, suffix=suffix)
In [99]:
value_to_plot = 'relative_to_random'
cbar_label = 'fraction cells\nrel. to chance'
suffix = '_full_range_manual_sort_order'

vmax = cell_count_stats.relative_to_random.max()
vmin = cell_count_stats.relative_to_random.min()

cluster_order = processing.get_manual_sort_order()

plotting.plot_cell_stats_per_cluster_for_areas_depths(cluster_meta, cell_count_stats, n_clusters_cre,
                                             value_to_plot, cbar_label, cmap='PRGn', vmin=-vmax, vmax=vmax, 
                                             cluster_order=cluster_order, save_dir=base_dir, folder=folder, suffix=suffix)

sort clusters by the fraction of cells in VISp upper

In [100]:
value_to_plot = 'relative_to_random'
cbar_label = 'fraction cells\nrel. to chance'
suffix = '_full_range_VISp_upper_sort'

vmax = cell_count_stats.relative_to_random.max()
vmin = cell_count_stats.relative_to_random.min()

cluster_order = processing.get_cluster_order_for_metric_location(cell_count_stats, cluster_meta, 
                                                                 location='VISp_upper', metric='relative_to_random')  

plotting.plot_cell_stats_per_cluster_for_areas_depths(cluster_meta, cell_count_stats, n_clusters_cre,
                                                      value_to_plot, cbar_label, cmap='PRGn', vmin=-vmax, vmax=vmax, 
                                                      cluster_order=cluster_order, save_dir=base_dir, folder=folder, suffix=suffix)
In [101]:
value_to_plot = 'relative_to_random'
cbar_label = 'fraction cells\nrel. to chance'
suffix = '_limited_range_VISp_upper_sort'

cluster_order = processing.get_cluster_order_for_metric_location(cell_count_stats, cluster_meta, 
                                                                 location='VISp_upper', metric='relative_to_random')  

plotting.plot_cell_stats_per_cluster_for_areas_depths(cluster_meta, cell_count_stats, n_clusters_cre,
                                                      value_to_plot, cbar_label, cmap='PRGn', vmin=-1, vmax=1, 
                                                      cluster_order=cluster_order, save_dir=base_dir, folder=folder, suffix=suffix)

plot dropouts in this order

In [ ]:
plotting.plot_dropout_heatmaps_for_clusters_sorted(cluster_meta, feature_matrix, cluster_order=cluster_order, 
                                       save_dir=base_dir, folder=folder, sort_type='VISp_upper_pct_rel_chance')

try for just V1 vs LM or upper vs lower

In [271]:
cell_count_stats_area = processing.get_cell_count_stats(cluster_meta, conditions_to_groupby = ['targeted_structure'])

value_to_plot = 'relative_to_random'
cbar_label = 'fraction cells\nrel. to chance'
suffix = '_limited_range_area_sort'

cluster_order = processing.get_cluster_order_for_metric_location(cell_count_stats_area, cluster_meta, 
                                                                 location='VISp', metric='relative_to_random')  

plotting.plot_cell_stats_per_cluster_for_areas_depths(cluster_meta, cell_count_stats_area, n_clusters_cre,
                                                      value_to_plot, cbar_label, cmap='PRGn', vmin=-1, vmax=1, 
                                                      cluster_order=cluster_order, save_dir=base_dir, folder=folder, suffix=suffix)
In [272]:
plotting.plot_dropout_heatmaps_for_clusters_sorted(cluster_meta, feature_matrix, cluster_order=cluster_order, 
                                       save_dir=base_dir, folder=folder, sort_type='area_sort')

depths

In [278]:
cell_count_stats_area = processing.get_cell_count_stats(cluster_meta, conditions_to_groupby = ['binned_depth'])

value_to_plot = 'relative_to_random'
cbar_label = 'fraction cells\nrel. to chance'
suffix = '_limited_range_binned_depth_sort'

cluster_order = processing.get_cluster_order_for_metric_location(cell_count_stats_area, cluster_meta, 
                                                                 location=175, metric='relative_to_random')  

plotting.plot_cell_stats_per_cluster_for_areas_depths(cluster_meta, cell_count_stats_area, n_clusters_cre,
                                                      value_to_plot, cbar_label, cmap='PRGn', vmin=-1, vmax=1, 
                                                      cluster_order=cluster_order, save_dir=base_dir, folder=folder, 
                                                      suffix=suffix)
In [279]:
plotting.plot_dropout_heatmaps_for_clusters_sorted(cluster_meta, feature_matrix, cluster_order=cluster_order, 
                                       save_dir=base_dir, folder=folder, sort_type='binned_depth_sort')

layers

In [280]:
cell_count_stats_area = processing.get_cell_count_stats(cluster_meta, conditions_to_groupby = ['layer'])

value_to_plot = 'relative_to_random'
cbar_label = 'fraction cells\nrel. to chance'
suffix = '_limited_range_layer_sort'

cluster_order = processing.get_cluster_order_for_metric_location(cell_count_stats_area, cluster_meta, 
                                                                 location='upper', metric='relative_to_random')  

plotting.plot_cell_stats_per_cluster_for_areas_depths(cluster_meta, cell_count_stats_area, n_clusters_cre,
                                                      value_to_plot, cbar_label, cmap='PRGn', vmin=-1, vmax=1, 
                                                      cluster_order=cluster_order, save_dir=base_dir, folder=folder, 
                                                      suffix=suffix)
In [281]:
plotting.plot_dropout_heatmaps_for_clusters_sorted(cluster_meta, feature_matrix, cluster_order=cluster_order, 
                                       save_dir=base_dir, folder=folder, sort_type='upper_pct_rel_chance')

test plot components then plot clusters with other information as additional panels

plot clusters with % cells per area and depth

In [110]:
fraction_cells = processing.get_fraction_cells_per_area_depth(cluster_meta)
cre_fraction = fraction_cells[fraction_cells.cre_line==cre_line]
In [111]:
plotting.plot_fraction_cells_per_area_depth(cre_fraction, 1, ax=None)
Out[111]:
<matplotlib.axes._subplots.AxesSubplot at 0x29f0857fac8>

get relevant data and plot clusters with fractions

In [112]:
cell_count_stats = processing.get_cell_count_stats(cluster_meta)
cre_counts = cell_count_stats[cell_count_stats.cre_line==cre_line]
In [115]:
cluster_id = 1
plotting.plot_pct_rel_to_chance_for_cluster(cre_counts, cluster_id, ax=None)
Out[115]:
<matplotlib.axes._subplots.AxesSubplot at 0x29f01a4cfd0>

population average trace

In [116]:
# load dataframe with response traces
df_name = 'omission_response_df'
conditions = ['cell_specimen_id', 'omitted']

data_type = 'events'
event_type = 'omissions'
inclusion_criteria = 'active_only_closest_familiar_and_novel_containers_with_all_levels'


multi_session_df = loading.get_multi_session_df_for_conditions(data_type, event_type, conditions, inclusion_criteria)
print(len(multi_session_df.ophys_experiment_id.unique()))
there are 1885 experiments in the full multi_session_df
there are 1249 experiments in the multi_session_df after limiting to platform experiments
there are 402 experiments after filtering for inclusion criteria -  active_only_closest_familiar_and_novel_containers_with_all_levels
402
In [118]:
cluster_mdf = multi_session_df.merge(cluster_meta[['cluster_id']], 
                                     on='cell_specimen_id', how = 'inner')
In [119]:
plotting.plot_population_average_response_for_cluster(cluster_mdf, cre_line, cluster_id, change=False, omitted=True, ax=None)
Out[119]:
<matplotlib.axes._subplots.AxesSubplot at 0x29f01a439b0>

plot clusters, fraction per area depth, and pop avgs as rows

In [220]:
# get fraction cells relative to chance per cluster per cre_line
cell_count_stats = processing.get_cell_count_stats(cluster_meta)
# get fraction of cells per area/depth per cluster per cre_line 
fraction_cells = processing.get_fraction_cells_per_area_depth(cluster_meta)
In [221]:
for cre_line in cre_lines: 
    plotting.plot_clusters_stats_pop_avg_rows(cluster_meta, feature_matrix, multi_session_df,
                                              cell_count_stats, fraction_cells, cre_line, 
                                              sort_order=None, suffix='_size_order', 
                                              save_dir=base_dir, folder=folder, )

use manual sort order

In [222]:
# manual_sort_order = processing.get_manual_sort_order()

# for cre_line in cre_lines: 
#     plotting.plot_clusters_stats_pop_avg_rows(cluster_meta, feature_matrix, multi_session_df,
#                                               cell_count_stats, fraction_cells, cre_line, 
#                                               sort_order=manual_sort_order, suffix='_manual_sort', 
#                                               save_dir=base_dir, folder=folder, )

sort by visp upper

In [241]:
# get fraction cells relative to chance per cluster per cre_line
cell_count_stats = processing.get_cell_count_stats(cluster_meta)
# get fraction of cells per area/depth per cluster per cre_line 
fraction_cells = processing.get_fraction_cells_per_area_depth(cluster_meta)
In [242]:
cell_count_stats['location'] = cell_count_stats.targeted_structure+'_'+cell_count_stats.layer
cluster_order = processing.get_cluster_order_for_metric_location(cell_count_stats, cluster_meta, location='VISp_upper', metric='relative_to_random')  

for cre_line in cre_lines: 
    plotting.plot_clusters_stats_pop_avg_rows(cluster_meta, feature_matrix, multi_session_df,
                                              cell_count_stats, fraction_cells, cre_line, 
                                              sort_order=cluster_order, suffix='_VISp_upper_sort', 
                                              save_dir=base_dir, folder=folder, )

sort by V1 vs LM order

In [251]:
cell_count_stats_area = processing.get_cell_count_stats(cluster_meta, conditions_to_groupby = ['targeted_structure'])

value_to_plot = 'relative_to_random'
cbar_label = 'fraction cells\nrel. to chance'
suffix = '_limited_range_area_sort'

cluster_order = processing.get_cluster_order_for_metric_location(cell_count_stats_area, cluster_meta, 
                                                                 location='VISp', metric='relative_to_random')  

plotting.plot_cell_stats_per_cluster_for_areas_depths(cluster_meta, cell_count_stats_area, n_clusters_cre,
                                                      value_to_plot, cbar_label, cmap='PRGn', vmin=-1, vmax=1, 
                                                      cluster_order=cluster_order, save_dir=base_dir, folder=folder, suffix=suffix)
In [292]:
# cell_count_stats_area['location'] = cell_count_stats_area.targeted_structure
# # need to have both a 'layer' and 'targeted_structure' column for plot to work
# cell_count_stats_area['layer'] = cell_count_stats_area.targeted_structure

# cluster_order = processing.get_cluster_order_for_metric_location(cell_count_stats_area, cluster_meta, 
#                                                                  location='VISp', metric='relative_to_random')  

# for cre_line in cre_lines: 
#     plotting.plot_clusters_stats_pop_avg_rows(cluster_meta, feature_matrix, multi_session_df,
#                                               cell_count_stats_area, fraction_cells, cre_line, 
#                                               sort_order=cluster_order, suffix='_area_sort', 
#                                               save_dir=base_dir, folder=folder, )
In [ ]:
 
In [ ]:
 
In [ ]:
 

plot as columns

In [227]:
# for cre_line in cre_lines: 
#     plotting.plot_clusters_stats_pop_avg_cols(cluster_meta, feature_matrix, multi_session_df,
#                                               cell_count_stats, fraction_cells, cre_line, 
#                                               sort_order=cluster_order, suffix='_VISp_upper_sort', 
#                                               save_dir=base_dir, folder=folder, )

within cluster correlations sorted by VISp upper

In [147]:
sort_order = processing.get_cluster_order_for_metric_location(cell_count_stats, cluster_meta, location='VISp_upper', metric='relative_to_random')  

plotting.plot_within_cluster_correlations_all_cre(cluster_meta, n_clusters_cre, sort_order=sort_order, 
                                                  suffix='_VISp_upper_sort', save_dir=base_dir, folder=folder)
In [ ]:
 
In [ ]:
 
In [ ]:
 

Compute selectivity metrics on dropout scores

define stats

compute stats across cells

  • positive value of exp_mod_direction means stronger coding of pref feature in novel session
  • positive value of exp_mod_persistence means similar coding in Novel 1 and Novel >1 for pref feature
  • high value of feature_sel_within_session means highly feature selective in strongest exp level
  • low value of feature_sel_within_session means similar strength of coding for multiple features in a given session
  • high value of feature_sel_across sessions means there is a big difference between the strength of coding for the preferred feature in the preferred experience level and a different feature in a different experience level
  • low value of feature_sel_across sessions means similar coding across two different features in two different sessions
  • cell switching is indicated by low feature_sel_across_sessions and high experience selectivity
  • lack of coding overall would be low exp selectivity and low feature_sel_across_sessions
In [230]:
cell_stats = pd.DataFrame()
for i, cell_specimen_id in enumerate(cluster_meta.index.values):
    cell_dropouts = dropouts[dropouts.cell_specimen_id==cell_specimen_id].groupby('experience_level').mean()[features]
    stats = processing.get_coding_metrics(index_dropouts=cell_dropouts, index_value=cell_specimen_id, index_name='cell_specimen_id')
    cell_stats = pd.concat([cell_stats, stats], sort=False)
cell_stats = cell_stats.merge(cluster_meta, on='cell_specimen_id')
metrics = stats.keys()[-6:]
c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\processing.py:926: RuntimeWarning: invalid value encountered in double_scalars
c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\processing.py:929: RuntimeWarning: divide by zero encountered in double_scalars
c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\processing.py:929: RuntimeWarning: invalid value encountered in double_scalars
c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\processing.py:916: RuntimeWarning: invalid value encountered in double_scalars
c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\processing.py:920: RuntimeWarning: invalid value encountered in double_scalars
c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\processing.py:939: RuntimeWarning: invalid value encountered in double_scalars
c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\processing.py:952: RuntimeWarning: invalid value encountered in double_scalars

average stats across cells per cluster

In [231]:
avg_cluster_stats = cell_stats.groupby(['cre_line', 'cluster_id']).mean()
avg_cluster_stats = avg_cluster_stats[list(metrics)]
avg_cluster_stats = avg_cluster_stats.reset_index()
n_cells = cell_stats.groupby(['cre_line', 'cluster_id']).count()[['labels']].rename(columns={'labels':'n_cells'})
avg_cluster_stats = avg_cluster_stats.merge(n_cells, on=['cre_line', 'cluster_id'])

compute stats on each cluster directly

In [232]:
cluster_stats = pd.DataFrame()
for cre_line in cre_lines:
    # get cell specimen ids for this cre line
    cre_cell_specimen_ids = cluster_meta[cluster_meta.cre_line==cre_line].index.values
    # get cluster labels dataframe for this cre line
    cre_cluster_ids = cluster_meta.loc[cre_cell_specimen_ids]
    # get unique cluster labels for this cre line
    cluster_labels = np.sort(cre_cluster_ids.cluster_id.unique())
    # limit dropouts df to cells in this cre line
    feature_matrix_cre = feature_matrix.loc[cre_cell_specimen_ids]
    for i, cluster_id in enumerate(cluster_labels):
        # get cell specimen ids in this cluster in this cre line
        this_cluster_csids = cre_cluster_ids[cre_cluster_ids['cluster_id'] == cluster_id].index.values
        # get dropout scores for cells in this cluster in this cre line
        mean_dropout_df = np.abs(feature_matrix_cre.loc[this_cluster_csids].mean().unstack())
        stats = processing.get_coding_metrics(index_dropouts=mean_dropout_df.T, index_value=cluster_id, index_name='cluster_id')
        fraction_cre = len(this_cluster_csids) / float(len(cre_cell_specimen_ids))
        stats['fraction_cre'] = fraction_cre
        stats['cre_line'] = cre_line
        stats['F_max'] = mean_dropout_df['Familiar'].max()
        stats['N1_max'] = mean_dropout_df['Novel 1'].max()
        stats['N2_max'] = mean_dropout_df['Novel >1'].max()
        cluster_stats = pd.concat([cluster_stats, stats])
cluster_stats = cluster_stats.reset_index()
n_cells = cell_stats.groupby(['cre_line', 'cluster_id']).count()[['labels']].rename(columns={'labels':'n_cells_cluster'})
cluster_stats = cluster_stats.merge(n_cells, on=['cre_line', 'cluster_id'])
In [ ]:
 

merge cluster stats with cell counts per cluster

In [149]:
# create location column merging area and depth
cell_count_stats['location'] = cell_count_stats.targeted_structure+'_'+cell_count_stats.layer
# group & unstack to get fraction relative to random for each location as columns
fraction_cells = cell_count_stats[['cre_line', 'cluster_id', 'location', 'relative_to_random']].groupby(['cre_line', 'cluster_id', 'location']).mean().unstack()
# get rid of multi-index column
fraction_cells.columns = fraction_cells.columns.droplevel()
# merge fraction cells per location with other cluster metrics
cluster_stats = fraction_cells.reset_index().merge(cluster_stats, on=['cre_line', 'cluster_id'])
In [150]:
cluster_stats.head()
Out[150]:
cre_line cluster_id VISl_lower VISl_upper VISp_lower VISp_upper dominant_feature dominant_experience_level next_highest_conditions feature_selectivity experience_selectivity exp_mod_direction exp_mod_persistence feature_sel_within_session feature_sel_across_sessions fraction_cre F_max N1_max N2_max n_cells_cluster
0 Slc17a7-IRES2-Cre 1 -0.156830 0.662944 -0.384553 0.399189 task Novel 1 (Familiar, behavioral) 0.357109 0.243331 0.227614 0.587987 0.254160 0.267889 0.247123 0.013037 0.020721 0.012184 816
1 Slc17a7-IRES2-Cre 2 -0.013722 -0.338983 0.357097 -0.406513 all-images Novel >1 (Familiar, task) 0.895500 0.002784 -0.003099 1.008709 0.845088 0.829065 0.177165 0.864070 0.858731 0.866210 585
2 Slc17a7-IRES2-Cre 3 -0.294060 0.083104 -0.107539 0.198778 all-images Novel 1 (Novel >1, task) 0.926534 0.327805 0.973665 0.999147 0.898357 0.893041 0.155360 0.025168 0.893832 0.893069 513
3 Slc17a7-IRES2-Cre 4 -0.283032 -0.050360 -0.142199 0.286492 all-images Novel 1 (Familiar, task) 0.930123 0.980790 0.971493 0.004937 0.889282 0.963623 0.108722 0.016537 0.892661 0.016455 359
4 Slc17a7-IRES2-Cre 5 1.681118 -0.125109 0.162162 -0.582300 all-images Familiar (Novel >1, behavioral) 0.483975 0.058112 -0.021128 0.857172 0.231036 0.019346 0.069049 0.458800 0.439814 0.441385 228
In [ ]:
 

plot each relationship on its own fig

In [152]:
# for col_x in cluster_stats.columns[2:]:
#     for col_y in cluster_stats.columns[2:]:
#         try:
#             figsize=(4,4)
#             fig, ax = plt.subplots(figsize=figsize)
#             ax = sns.scatterplot(data=cluster_stats, x=col_x, y=col_y, size='fraction_cre', sizes=(0, 1000), 
#                                 hue='cre_line', hue_order=cre_lines, palette=colors, ax=ax)
#             ax.axvline(x=0, ymin=-1, ymax=1, linestyle='--', color='gray')
#             ax.axhline(y=0, xmin=-1, xmax=1, linestyle='--', color='gray')
#             ax.get_legend().remove()
#             plt.axis("equal")
#             for i in range(len(cluster_stats[col_x].values)):
#                 xy = (cluster_stats[col_x].values[i], cluster_stats[col_y].values[i])
#                 color_index = cluster_stats['color_index'].values[i]
#                 cluster_id = cluster_stats['cluster_id'].values[i]
#                 plt.annotate(cluster_id, xy, color=colors[color_index])
#             utils.save_figure(fig, figsize, save_dir, 'scatterplots', col_x+'_'+col_y)
#         except:
#             pass

barplots of metric values for clusters

In [162]:
cluster_order = processing.get_cluster_order_for_metric_location(cell_count_stats, cluster_meta, location='VISp_upper', metric='relative_to_random')  
n_clusters = [n_clusters_cre[cre] for cre in cre_lines]

for metric in cluster_stats.columns[2:]:
    try:
        figsize=(15,2.5)
        fig, ax = plt.subplots(1, 3, figsize=figsize, gridspec_kw={'width_ratios':n_clusters}, sharey=True)
        for i, cre_line in enumerate(processing.get_cre_lines(cluster_meta)):
            data = cluster_stats[cluster_stats.cre_line==cre_line]
            ax[i] = sns.barplot(data=data, x='cluster_id', order=cluster_order[cre_line], y=metric, 
                                hue='VISp_upper', palette='PRGn', ax=ax[i], dodge=False)
            ax[i].set_title(processing.get_cell_type_for_cre_line(cluster_meta, cre_line))
            ax[i].get_legend().remove()
        plt.subplots_adjust(wspace=0.4)
        utils.save_figure(fig, figsize, save_dir, 'pointplots_VISp_upper_sort', metric)
    except:
        print('problem for', metric)
problem for next_highest_conditions
In [ ]:
 

relationship of across session switching and Familiar session max to % cells in VISp or VISl

In [176]:
col_x = 'feature_sel_across_sessions'
col_y = 'VISp_upper'

colors = utils.get_cre_line_colors()

figsize=(4,4)
fig, ax = plt.subplots(figsize=figsize)
ax = sns.scatterplot(data=cluster_stats, x=col_x, y=col_y, size='fraction_cre', sizes=(0, 1000), 
                    hue='cre_line', hue_order=cre_lines, palette=colors, ax=ax)
ax.axvline(x=0, ymin=-1, ymax=1, linestyle='--', color='gray')
ax.axhline(y=0, xmin=-1, xmax=1, linestyle='--', color='gray')
ax.get_legend().remove()
plt.axis("equal")
# for i in range(len(cluster_stats[col_x].values)):
#     xy = (cluster_stats[col_x].values[i], cluster_stats[col_y].values[i])
#     color_index = cluster_stats['color_index'].values[i]
#     cluster_id = cluster_stats['cluster_id'].values[i]
#     plt.annotate(cluster_id, xy, color=colors[color_index])
utils.save_figure(fig, figsize, save_dir, 'scatterplots', col_x+'_'+col_y)
In [177]:
col_x = 'F_max'
col_y = 'VISp_upper'

colors = utils.get_cre_line_colors()

figsize=(4,4)
fig, ax = plt.subplots(figsize=figsize)
ax = sns.scatterplot(data=cluster_stats, x=col_x, y=col_y, size='fraction_cre', sizes=(0, 1000), 
                    hue='cre_line', hue_order=cre_lines, palette=colors, ax=ax)
ax.axvline(x=0, ymin=-1, ymax=1, linestyle='--', color='gray')
ax.axhline(y=0, xmin=-1, xmax=1, linestyle='--', color='gray')
ax.get_legend().remove()
plt.axis("equal")
# for i in range(len(cluster_stats[col_x].values)):
#     xy = (cluster_stats[col_x].values[i], cluster_stats[col_y].values[i])
#     color_index = cluster_stats['color_index'].values[i]
#     cluster_id = cluster_stats['cluster_id'].values[i]
#     plt.annotate(cluster_id, xy, color=colors[color_index])
utils.save_figure(fig, figsize, save_dir, 'scatterplots', col_x+'_'+col_y)
In [179]:
col_x = 'F_max'
col_y = 'VISl_upper'

colors = utils.get_cre_line_colors()

figsize=(4,4)
fig, ax = plt.subplots(figsize=figsize)
ax = sns.scatterplot(data=cluster_stats, x=col_x, y=col_y, size='fraction_cre', sizes=(0, 1000), 
                    hue='cre_line', hue_order=cre_lines, palette=colors, ax=ax)
ax.axvline(x=0, ymin=-1, ymax=1, linestyle='--', color='gray')
ax.axhline(y=0, xmin=-1, xmax=1, linestyle='--', color='gray')
ax.get_legend().remove()
plt.axis("equal")
# for i in range(len(cluster_stats[col_x].values)):
#     xy = (cluster_stats[col_x].values[i], cluster_stats[col_y].values[i])
#     color_index = cluster_stats['color_index'].values[i]
#     cluster_id = cluster_stats['cluster_id'].values[i]
#     plt.annotate(cluster_id, xy, color=colors[color_index])
utils.save_figure(fig, figsize, save_dir, 'scatterplots', col_x+'_'+col_y)
In [ ]:
 

boxplots of metric values for all cells in each cluster

first merge with fraction cells info per cluster

In [233]:
# create location column merging area and depth
cell_count_stats['location'] = cell_count_stats.targeted_structure+'_'+cell_count_stats.layer
# group & unstack to get fraction relative to random for each location as columns
fraction_cells = cell_count_stats[['cre_line', 'cluster_id', 'location', 'relative_to_random']].groupby(['cre_line', 'cluster_id', 'location']).mean().unstack()
# get rid of multi-index column
fraction_cells.columns = fraction_cells.columns.droplevel()
# merge fraction cells per location with other cluster metrics
cell_stats_loc = fraction_cells.reset_index().merge(cell_stats.reset_index(), on=['cre_line', 'cluster_id'])
cell_stats_loc = cell_stats_loc.set_index('cell_specimen_id')
In [235]:
cluster_order = processing.get_cluster_order_for_metric_location(cell_count_stats, cluster_meta, location='VISp_upper', metric='relative_to_random')  
n_clusters = [n_clusters_cre[cre] for cre in cre_lines]

for metric in cluster_stats.columns[2:]:
    try:
        figsize=(15,2.5)
        fig, ax = plt.subplots(1, 3, figsize=figsize, gridspec_kw={'width_ratios':n_clusters}, sharey=True)
        for i, cre_line in enumerate(processing.get_cre_lines(cluster_meta)):
            data = cell_stats_loc[cell_stats_loc.cre_line==cre_line]
            ax[i] = sns.boxplot(data=data, x='cluster_id', order=cluster_order[cre_line], y=metric, 
                                hue='VISp_upper', palette='PRGn', ax=ax[i], dodge=False)
            ax[i].set_title(processing.get_cell_type_for_cre_line(cluster_meta, cre_line))
            ax[i].get_legend().remove()
        plt.subplots_adjust(wspace=0.4)
        utils.save_figure(fig, figsize, save_dir, 'boxplots_VISp_upper_sort', metric)
    except:
        print('problem for', metric)
problem for next_highest_conditions
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\numpy\lib\function_base.py:3968: RuntimeWarning: invalid value encountered in multiply
  x2 = take(ap, indices_above, axis=axis) * weights_above
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\numpy\lib\function_base.py:3968: RuntimeWarning: invalid value encountered in multiply
  x2 = take(ap, indices_above, axis=axis) * weights_above
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\cbook\__init__.py:1291: RuntimeWarning: invalid value encountered in double_scalars
  stats['iqr'] = q3 - q1
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\cbook\__init__.py:1291: RuntimeWarning: invalid value encountered in double_scalars
  stats['iqr'] = q3 - q1
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\numpy\lib\function_base.py:3968: RuntimeWarning: invalid value encountered in multiply
  x2 = take(ap, indices_above, axis=axis) * weights_above
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\cbook\__init__.py:1291: RuntimeWarning: invalid value encountered in double_scalars
  stats['iqr'] = q3 - q1
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\cbook\__init__.py:1291: RuntimeWarning: invalid value encountered in double_scalars
  stats['iqr'] = q3 - q1
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\numpy\lib\function_base.py:3968: RuntimeWarning: invalid value encountered in multiply
  x2 = take(ap, indices_above, axis=axis) * weights_above
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\cbook\__init__.py:1291: RuntimeWarning: invalid value encountered in double_scalars
  stats['iqr'] = q3 - q1
problem for fraction_cre
problem for F_max
problem for N1_max
problem for N2_max
problem for n_cells_cluster
In [ ]:
 

boxplots using V1/LM or upper/lower

In [282]:
cell_count_stats_area = processing.get_cell_count_stats(cluster_meta, conditions_to_groupby = ['targeted_structure'])
In [285]:
# create location column merging area and depth
cell_count_stats_area['location'] = cell_count_stats_area.targeted_structure
# group & unstack to get fraction relative to random for each location as columns
fraction_cells = cell_count_stats_area[['cre_line', 'cluster_id', 'location', 'relative_to_random']].groupby(['cre_line', 'cluster_id', 'location']).mean().unstack()
# get rid of multi-index column
fraction_cells.columns = fraction_cells.columns.droplevel()
# merge fraction cells per location with other cluster metrics
cell_stats_loc = fraction_cells.reset_index().merge(cell_stats.reset_index(), on=['cre_line', 'cluster_id'])
cell_stats_loc = cell_stats_loc.set_index('cell_specimen_id')
In [288]:
cluster_order = processing.get_cluster_order_for_metric_location(cell_count_stats_area, cluster_meta, 
                                                                 location='VISp', metric='relative_to_random')  
n_clusters = [n_clusters_cre[cre] for cre in cre_lines]

for metric in cluster_stats.columns[2:]:
    try:
        figsize=(15,2.5)
        fig, ax = plt.subplots(1, 3, figsize=figsize, gridspec_kw={'width_ratios':n_clusters}, sharey=True)
        for i, cre_line in enumerate(processing.get_cre_lines(cluster_meta)):
            data = cell_stats_loc[cell_stats_loc.cre_line==cre_line]
            ax[i] = sns.boxplot(data=data, x='cluster_id', order=cluster_order[cre_line], y=metric, 
                                hue='VISp', palette='PRGn', ax=ax[i], dodge=False)
            ax[i].set_title(processing.get_cell_type_for_cre_line(cluster_meta, cre_line))
            ax[i].get_legend().remove()
        plt.subplots_adjust(wspace=0.4)
        utils.save_figure(fig, figsize, save_dir, 'boxplots_VISp_sort', metric)
    except:
        print('problem for', metric)
problem for next_highest_conditions
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\numpy\lib\function_base.py:3968: RuntimeWarning: invalid value encountered in multiply
  x2 = take(ap, indices_above, axis=axis) * weights_above
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\numpy\lib\function_base.py:3968: RuntimeWarning: invalid value encountered in multiply
  x2 = take(ap, indices_above, axis=axis) * weights_above
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\cbook\__init__.py:1291: RuntimeWarning: invalid value encountered in double_scalars
  stats['iqr'] = q3 - q1
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\cbook\__init__.py:1291: RuntimeWarning: invalid value encountered in double_scalars
  stats['iqr'] = q3 - q1
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\numpy\lib\function_base.py:3968: RuntimeWarning: invalid value encountered in multiply
  x2 = take(ap, indices_above, axis=axis) * weights_above
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\cbook\__init__.py:1291: RuntimeWarning: invalid value encountered in double_scalars
  stats['iqr'] = q3 - q1
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\numpy\lib\function_base.py:3968: RuntimeWarning: invalid value encountered in multiply
  x2 = take(ap, indices_above, axis=axis) * weights_above
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\cbook\__init__.py:1291: RuntimeWarning: invalid value encountered in double_scalars
  stats['iqr'] = q3 - q1
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\cbook\__init__.py:1291: RuntimeWarning: invalid value encountered in double_scalars
  stats['iqr'] = q3 - q1
problem for fraction_cre
problem for F_max
problem for N1_max
problem for N2_max
problem for n_cells_cluster

layer

In [289]:
cell_count_stats_area = processing.get_cell_count_stats(cluster_meta, conditions_to_groupby = ['layer'])
In [290]:
# create location column merging area and depth
cell_count_stats_area['location'] = cell_count_stats_area.layer
# group & unstack to get fraction relative to random for each location as columns
fraction_cells = cell_count_stats_area[['cre_line', 'cluster_id', 'location', 'relative_to_random']].groupby(['cre_line', 'cluster_id', 'location']).mean().unstack()
# get rid of multi-index column
fraction_cells.columns = fraction_cells.columns.droplevel()
# merge fraction cells per location with other cluster metrics
cell_stats_loc = fraction_cells.reset_index().merge(cell_stats.reset_index(), on=['cre_line', 'cluster_id'])
cell_stats_loc = cell_stats_loc.set_index('cell_specimen_id')
In [291]:
cluster_order = processing.get_cluster_order_for_metric_location(cell_count_stats_area, cluster_meta, 
                                                                 location='upper', metric='relative_to_random')  
n_clusters = [n_clusters_cre[cre] for cre in cre_lines]

for metric in cluster_stats.columns[2:]:
    try:
        figsize=(15,2.5)
        fig, ax = plt.subplots(1, 3, figsize=figsize, gridspec_kw={'width_ratios':n_clusters}, sharey=True)
        for i, cre_line in enumerate(processing.get_cre_lines(cluster_meta)):
            data = cell_stats_loc[cell_stats_loc.cre_line==cre_line]
            ax[i] = sns.boxplot(data=data, x='cluster_id', order=cluster_order[cre_line], y=metric, 
                                hue='upper', palette='PRGn', ax=ax[i], dodge=False)
            ax[i].set_title(processing.get_cell_type_for_cre_line(cluster_meta, cre_line))
            ax[i].get_legend().remove()
        plt.subplots_adjust(wspace=0.4)
        utils.save_figure(fig, figsize, save_dir, 'boxplots_upper_sort', metric)
    except:
        print('problem for', metric)
problem for next_highest_conditions
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\numpy\lib\function_base.py:3968: RuntimeWarning: invalid value encountered in multiply
  x2 = take(ap, indices_above, axis=axis) * weights_above
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\cbook\__init__.py:1291: RuntimeWarning: invalid value encountered in double_scalars
  stats['iqr'] = q3 - q1
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\numpy\lib\function_base.py:3968: RuntimeWarning: invalid value encountered in multiply
  x2 = take(ap, indices_above, axis=axis) * weights_above
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\cbook\__init__.py:1291: RuntimeWarning: invalid value encountered in double_scalars
  stats['iqr'] = q3 - q1
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\numpy\lib\function_base.py:3968: RuntimeWarning: invalid value encountered in multiply
  x2 = take(ap, indices_above, axis=axis) * weights_above
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\cbook\__init__.py:1291: RuntimeWarning: invalid value encountered in double_scalars
  stats['iqr'] = q3 - q1
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\cbook\__init__.py:1291: RuntimeWarning: invalid value encountered in double_scalars
  stats['iqr'] = q3 - q1
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\numpy\lib\function_base.py:3968: RuntimeWarning: invalid value encountered in multiply
  x2 = take(ap, indices_above, axis=axis) * weights_above
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\cbook\__init__.py:1291: RuntimeWarning: invalid value encountered in double_scalars
  stats['iqr'] = q3 - q1
problem for fraction_cre
problem for F_max
problem for N1_max
problem for N2_max
problem for n_cells_cluster
In [ ]:
 
In [ ]:
 

Summary plots experimentation

In [ ]:
 

plot experience mod values for cluster average

In [773]:
figsize = (17,5)
fig, ax = plt.subplots(1,3, figsize=figsize)
for i, cre_line in enumerate(cre_lines):
    data = avg_cluster_stats[avg_cluster_stats.cre_line==cre_line]
    data = data[data.cluster_id<10] # TEMPORARY to make sure we dont exceed # clusters in palette
    cluster_order = np.sort(data.cluster_id.unique())
    n_clusters = len(cluster_order)
    print(n_clusters)
    ax[i] = sns.scatterplot(data=data, x='exp_mod_direction', y='exp_mod_persistence', 
                            hue='cluster_id', hue_order=cluster_order, palette=sns.color_palette()[:n_clusters], 
                            size='n_cells', sizes=(0, 1000), ax=ax[i])
    ax[i].axvline(x=0, ymin=-1, ymax=1, linestyle='--', color='gray')
    ax[i].axhline(y=0, xmin=-1, xmax=1, linestyle='--', color='gray')
    ax[i].set_title(cell_types[cre_line]+'\n experience modulation')
    ax[i].legend(fontsize='small', title_fontsize='small', bbox_to_anchor=(1,1))
    ax[i].set_xlabel('direction')
    ax[i].set_ylabel('persistence')
    ax[i].set_xlim(-1.2,1.2)
    ax[i].set_ylim(-1.2,1.2)
fig.tight_layout()
# utils.save_figure(fig, figsize, base_dir, folder, 'experience_modulation_avg_across_cells_in_cluster')
10
6
10

cells and clusters together

In [772]:
figsize = (15,5)
fig, ax = plt.subplots(1,3, figsize=figsize)
for i, cre_line in enumerate(cre_lines):
    data = cell_stats[cell_stats.cre_line==cre_line]
    data = data[data.cluster_id<10] # TEMPORARY to make sure we dont exceed # clusters in palette
    cluster_order = np.sort(data.cluster_id.unique())
    n_clusters = len(cluster_order)
    ax[i] = sns.scatterplot(data=data, x='exp_mod_direction', y='exp_mod_persistence', alpha=0.2,
                            hue='cluster_id', hue_order=cluster_order, palette=sns.color_palette()[:n_clusters], ax=ax[i])
    
    data = avg_cluster_stats[avg_cluster_stats.cre_line==cre_line]
    data = data[data.cluster_id<10] # TEMPORARY to make sure we dont exceed # clusters in palette
    cluster_order = np.sort(data.cluster_id.unique())
    n_clusters = len(cluster_order)
    ax[i] = sns.scatterplot(data=data, x='exp_mod_direction', y='exp_mod_persistence', 
                            hue='cluster_id', hue_order=cluster_order, palette=sns.color_palette()[:n_clusters], 
                            size='n_cells', sizes=(0, 1000), ax=ax[i], alpha=0.7)
    
    ax[i].axvline(x=0, ymin=-1, ymax=1, linestyle='--', color='gray')
    ax[i].axhline(y=0, xmin=-1, xmax=1, linestyle='--', color='gray')
    ax[i].set_title(cell_types[cre_line]+'\n experience modulation')
    ax[i].legend(fontsize='small', title_fontsize='small', bbox_to_anchor=(1,1))
    ax[i].set_xlabel('direction')
    ax[i].set_ylabel('persistence')
    ax[i].set_xlim(-1.2,1.2)
    ax[i].set_ylim(-1.2,1.2)
fig.tight_layout()
# utils.save_figure(fig, figsize, base_dir, folder, 'experience_modulation_across_cells_and_clusters')
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\ipykernel_launcher.py:27: UserWarning: Tight layout not applied. The bottom and top margins cannot be made large enough to accommodate all axes decorations. 
In [ ]:
 

feature selectivity within and across sesions

In [216]:
# figsize = (15,5)
# fig, ax = plt.subplots(1,3, figsize=figsize)
# for i, cre_line in enumerate(cre_lines):
#     data = cell_stats[cell_stats.cre_line==cre_line]
#     data = data[data.cluster_id<10] # TEMPORARY to make sure we dont exceed # clusters in palette
#     cluster_order = np.sort(data.cluster_id.unique())
#     n_clusters = len(cluster_order)
#     ax[i] = sns.scatterplot(data=data, x='feature_sel_within_session', y='feature_sel_across_sessions', alpha=0.2,
#                             hue='cluster_id', hue_order=cluster_order, palette=sns.color_palette()[:n_clusters], ax=ax[i])
    
#     data = avg_cluster_stats[avg_cluster_stats.cre_line==cre_line]
#     data = data[data.cluster_id<10] # TEMPORARY to make sure we dont exceed # clusters in palette
#     cluster_order = np.sort(data.cluster_id.unique())
#     n_clusters = len(cluster_order)
#     ax[i] = sns.scatterplot(data=data, x='feature_sel_within_session', y='feature_sel_across_sessions', 
#                             hue='cluster_id', hue_order=cluster_order, palette=sns.color_palette()[:n_clusters], 
#                             size='n_cells', sizes=(0, 1000), ax=ax[i], alpha=0.7)
    
# #     ax[i].axvline(x=0, ymin=0, ymax=1, linestyle='--', color='gray')
# #     ax[i].axhline(y=0, xmin=-1, xmax=1, linestyle='--', color='gray')
#     ax[i].set_title(cell_types[cre_line]+'\n feature selectivity')
#     ax[i].legend(fontsize='small', title_fontsize='small', bbox_to_anchor=(1,1))
#     ax[i].set_xlabel('within session')
#     ax[i].set_ylabel('across sessions')
#     ax[i].set_xlim(-0.1,1.1)
#     ax[i].set_ylim(-0.1,1.1)
# fig.tight_layout()
# # utils.save_figure(fig, figsize, base_dir, folder, 'feature_selectivity_across_cells_and_clusters')

feature selectivity across sessions vs. experience selectivity

  • cell switching is indicated by low feature_sel_across_sessions and high experience selectivity
In [213]:
figsize = (17,5)
fig, ax = plt.subplots(1,3, figsize=figsize)
for i, cre_line in enumerate(cre_lines):
    data = avg_cluster_stats[avg_cluster_stats.cre_line==cre_line]
    data = data[data.cluster_id<10] # TEMPORARY to make sure we dont exceed # clusters in palette
    cluster_order = np.sort(data.cluster_id.unique())
    n_clusters = len(cluster_order)
    print(n_clusters)
    ax[i] = sns.scatterplot(data=data, x='experience_selectivity', y='feature_sel_across_sessions', 
                            hue='cluster_id', hue_order=cluster_order, palette=sns.color_palette()[:n_clusters], 
                            size='n_cells', sizes=(0, 1000), ax=ax[i])
#     ax[i].axvline(x=0, ymin=-1, ymax=1, linestyle='--', color='gray')
#     ax[i].axhline(y=0, xmin=-1, xmax=1, linestyle='--', color='gray')
    ax[i].set_title(cell_types[cre_line])
    ax[i].legend(fontsize='small', title_fontsize='small', bbox_to_anchor=(1,1))
    ax[i].set_xlabel('experience modulation')
    ax[i].set_ylabel('feature selectivity across sessions')
    ax[i].set_xlim(0, 1.1)
    ax[i].set_ylim(0, 1.1)
fig.tight_layout()
# utils.save_figure(fig, figsize, base_dir, folder, 'feature_sel_exp_mod_avg_across_cells_in_cluster')
9
5
9
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 

summarize

In [192]:
n_clusters_per_cre = cell_stats.groupby(['cre_line']).count().rename(columns={'cluster_id':'n_cells_total'})[['n_cells_total']]
n_clusters_per_feature = cell_stats.groupby(['cre_line', 'dominant_feature']).count().rename(columns={'cluster_id':'n_cells'})[['n_cells']]
n_clusters_per_feature = n_clusters_per_feature.reset_index().merge(n_clusters_per_cre, on='cre_line')
n_clusters_per_feature['fraction_cells'] = n_clusters_per_feature['n_cells']/n_clusters_per_feature['n_cells_total']
In [193]:
colors = utils.get_cre_line_colors()[::-1]

figsize=(4.5,3)
fig, ax = plt.subplots(figsize=figsize)
sns.barplot(data=n_clusters_per_feature, x='dominant_feature', y='fraction_cells', hue='cre_line',
             palette=colors, order=features, hue_order=cre_lines, ax=ax)
ax.legend(fontsize='xx-small', title='', loc='upper right')
ax.set_ylabel('fraction of cells')
ax.set_xlabel('')
ax.set_title('feature preference')
ax.set_xticklabels(features, rotation=45)
Out[193]:
[Text(0, 0, 'all-images'),
 Text(0, 0, 'omissions'),
 Text(0, 0, 'behavioral'),
 Text(0, 0, 'task')]
In [194]:
n_clusters_per_cre = cell_stats.groupby(['cre_line']).count().rename(columns={'cluster_id':'n_cells_total'})[['n_cells_total']]
n_clusters_per_feature = cell_stats.groupby(['cre_line', 'dominant_experience_level']).count().rename(columns={'cluster_id':'n_cells'})[['n_cells']]
n_clusters_per_feature = n_clusters_per_feature.reset_index().merge(n_clusters_per_cre, on='cre_line')
n_clusters_per_feature['fraction_cells'] = n_clusters_per_feature['n_cells']/n_clusters_per_feature['n_cells_total']
In [195]:
colors = utils.get_cre_line_colors()[::-1]
experience_levels = np.sort(cluster_meta.experience_level.unique())

figsize=(4.5,3)
fig, ax = plt.subplots(figsize=figsize)
sns.barplot(data=n_clusters_per_feature, x='dominant_experience_level', y='fraction_cells', hue='cre_line',
             palette=colors, order=experience_levels, hue_order=cre_lines, ax=ax)
ax.legend(fontsize='xx-small', title='', loc='upper right')
ax.set_ylabel('fraction of cells')
ax.set_xlabel('')
ax.set_title('experience level preference')
ax.set_xticklabels(experience_levels, rotation=45);

repeat but per cluster instead of across cells

In [189]:
cell_stats = cluster_meta.copy()
for i, cell_specimen_id in enumerate(cell_stats.index.values):
    # get dropout scores per cell 
    cell_dropouts = dropouts[dropouts.cell_specimen_id==cell_specimen_id].groupby('experience_level').mean()[features]
    # get preferred regressor and experience level and save
    dominant_feature = cell_dropouts.stack().idxmax()[1]
    dominant_experience_level = cell_dropouts.stack().idxmax()[0]
    cell_stats.loc[cell_specimen_id, 'dominant_feature'] = dominant_feature
    cell_stats.loc[cell_specimen_id, 'dominant_experience_level'] = dominant_experience_level
    # get selectivity for feature & experience level 
    # feature selectivity is ratio of largest and next largest dropouts for the dominant experience level
    order = np.argsort(cell_dropouts.loc[dominant_experience_level])
    values = cell_dropouts.loc[dominant_experience_level].values[order[::-1]]
    feature_selectivity = (values[0]-(np.mean(values[1:])))/(values[0]+(np.mean(values[1:])))
    # experience selectivity is ratio of largest and next largest dropouts for the dominant feature
    order = np.argsort(cell_dropouts[dominant_feature])
    values = cell_dropouts[dominant_feature].values[order[::-1]]
    experience_selectivity = (values[0]-(np.mean(values[1:])))/(values[0]+(np.mean(values[1:])))
    cell_stats.loc[cell_specimen_id, 'feature_selectivity'] = feature_selectivity
    cell_stats.loc[cell_specimen_id, 'experience_selectivity'] = experience_selectivity
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\ipykernel_launcher.py:14: RuntimeWarning: invalid value encountered in double_scalars
  
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\ipykernel_launcher.py:18: RuntimeWarning: invalid value encountered in double_scalars
In [202]:
cluster_stats.keys()
Out[202]:
Index(['cre_line', 'cluster_id', 'VISl_lower', 'VISl_upper', 'VISp_lower',
       'VISp_upper', 'dominant_feature', 'dominant_experience_level',
       'next_highest_conditions', 'feature_selectivity',
       'experience_selectivity', 'exp_mod_direction', 'exp_mod_persistence',
       'feature_sel_within_session', 'feature_sel_across_sessions',
       'fraction_cre', 'F_max', 'N1_max', 'N2_max', 'n_cells_cluster'],
      dtype='object')
In [210]:
min_size = 0
max_size = 1000

colors = utils.get_cre_line_colors()

figsize=(4,3)
fig, ax = plt.subplots(figsize=figsize)
sns.scatterplot(data=cluster_stats, x='cre_line', y='feature_selectivity', hue='cre_line',
             palette=colors, hue_order=cell_types, size='fraction_cre', sizes=(min_size, max_size), ax=ax)
ax.legend(fontsize='xx-small', title='', bbox_to_anchor=(1.1,1))
ax.set_ylabel('feature selectivity index')
ax.set_xlabel('')
ax.set_title('feature selectivity')
# ax.set_xticklabels([processing.get_cell_type_for_cre_line(cluster_meta, cre_line) for cre_line in cre_lines], rotation=45)
ax.set_xticklabels([cre_line.split('-')[0] for cre_line in cre_lines[::-1]], rotation=0)

ax.set_ylim(0,1)
Out[210]:
(0, 1)
In [212]:
max_size = 0
max_size = 1000

figsize=(4,3)
fig, ax = plt.subplots(figsize=figsize)
sns.scatterplot(data=cluster_stats, x='cre_line', y='experience_selectivity', hue='cre_line', 
             palette=colors, hue_order=cell_types, size='fraction_cre', sizes=(min_size, max_size), ax=ax)
ax.legend(fontsize='x-small', title='', bbox_to_anchor=(1.1,1))
ax.set_ylabel('experience selectivity')
ax.set_xlabel('')
ax.set_title('experience selectivity')
# ax.set_xticklabels([processing.get_cell_type_for_cre_line(cluster_meta, cre_line) for cre_line in cre_lines], rotation=45)
ax.set_xticklabels([cre_line.split('-')[0] for cre_line in cre_lines[::-1]], rotation=0)

ax.set_ylim(0,1)
Out[212]:
(0, 1)
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 

details stuff / validation

count number of cells in different areas & depths

In [ ]:
make_frequency_table(cluster_meta, groupby_columns = ['cre_line', 'targeted_structure'], normalize=False)

There are way more cells in VISp than VISl

In [ ]:
make_frequency_table(cluster_meta, groupby_columns = ['cre_line', 'layer'], normalize=False)

There are way more cells in lower layers for Sst and upper layers for Vip

In [ ]:
make_frequency_table(cluster_meta, groupby_columns = ['cre_line', 'binned_depth'], normalize=False)

Numbers get pretty small for inhibitory lines when looking at depths in 4 bins

get frequency across areas & layer for one cre line

In [ ]:
cre_line = cre_lines[1]
print(cre_line)
In [ ]:
make_frequency_table(cluster_meta[cluster_meta.cre_line==cre_line], 
                     groupby_columns = ['targeted_structure', 'layer'], normalize=False)
In [ ]:
cre_meta = cluster_meta[cluster_meta.cre_line==cre_line]
make_frequency_table(cre_meta, groupby_columns = ['targeted_structure', 'layer'], normalize=True)
In [ ]:
 

plot frequency by area and depth

In [ ]:
cre_line = cre_lines[1]
cre_meta = cluster_meta[cluster_meta.cre_line==cre_line]
frequency = make_frequency_table(cre_meta, groupby_columns = ['targeted_structure', 'layer'], normalize=True)

Rows add up to 1

In [ ]:
fig, ax = plt.subplots(figsize=(6,2.5))
ax = sns.heatmap(frequency, vmin=0, cmap='magma', ax=ax, cbar_kws={'shrink':0.8, 'label':'fraction of cells'})
ax.set_ylim((0, 4))
# ax.set_yticklabels(frequency.index, rotation=0, horizontalalignment='center')
ax.set_xlim(-0.5, len(frequency.columns)+0.5)
ax.set_ylabel('')
ax.set_title(cell_types[cre_line])
fig.tight_layout()

normalizing to cluster size doesnt make sense

In [ ]:
stats_df = cre_meta[['cluster_id', 'binned_depth']]
frequency_table= stats_df.groupby('cluster_id')['binned_depth'].value_counts(normalize=False).unstack()
frequency_table= frequency_table.fillna(0)
frequency_table
In [ ]:
stats_df = cre_meta[['cluster_id', 'binned_depth']]
frequency_table= stats_df.groupby('cluster_id')['binned_depth'].value_counts(normalize=True).unstack()
frequency_table= frequency_table.fillna(0)
frequency_table
In [ ]:
sns.heatmap(frequency_table)
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 

plots with individual cells per cluster

In [ ]:
# get dropouts for some specific condition and add to cre meta for plotting
condition = ('all-images', 'Familiar')
metric_data = df[condition]
metric_data = metric_data.rename(columns={('all-images', 'Novel 1'):'metric'})
metric_data = pd.DataFrame(metric_data, columns=['metric'])
metric_meta = cre_meta.merge(metric_data, on='cell_specimen_id')
In [ ]:
fig, ax = plt.subplots()
ax = sns.scatterplot(data=metric_meta, y='imaging_depth', x='metric', hue='cluster_id', palette='magma_r', ax=ax)
ax.legend(bbox_to_anchor=(1,1), )
In [ ]:
fig, ax = plt.subplots()
ax = sns.pointplot(data=metric_meta, y='binned_depth', x='metric', hue='cluster_id', 
                   orient='h', join=False, dodge=0.5, palette='magma_r', ax=ax)
ax.legend(bbox_to_anchor=(1,1), )
In [ ]:
# get dropouts for some specific condition and add to cre meta for plotting
condition = ('behavioral', 'Novel 1')
metric_data = df[condition]
metric_data = metric_data.rename(columns={('all-images', 'Novel 1'):'metric'})
metric_data = pd.DataFrame(metric_data, columns=['metric'])
metric_meta = cre_meta.merge(metric_data, on='cell_specimen_id')
In [ ]:
fig, ax = plt.subplots()
ax = sns.scatterplot(data=metric_meta, y='imaging_depth', x='metric', hue='cluster_id', palette='magma_r', ax=ax)
ax.legend(bbox_to_anchor=(1,1), )
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
area_df = pd.DataFrame(frequency_table_area.unstack(), columns=['fraction']).reset_index()
area_df = area_df.groupby(['cluster_id', 'targeted_structure']).mean().unstack()
area_df.columns = area_df.columns.droplevel()
fig, ax = plt.subplots(figsize=(6,2))
ax = sns.heatmap(area_df.T, vmin=0, cmap='magma', ax=ax, cbar_kws={'shrink':0.8, 'label':'fraction cells\nper area'})
ax.set_ylim((0, 2))
ax.set_yticklabels(area_df.T.index, rotation=0, horizontalalignment='center')
ax.set_ylabel('')
ax.set_xlim(-0.5, len(area_df)+0.5)
fig.tight_layout()
In [ ]:
depth_df = pd.DataFrame(frequency_table_depth.unstack(), columns=['fraction']).reset_index()
depth_df = depth_df.groupby(['cluster_id', 'binned_depth']).mean().unstack()
depth_df.columns = depth_df.columns.droplevel()

fig, ax = plt.subplots(figsize=(6,2.5))
ax = sns.heatmap(depth_df.T, vmin=0, cmap='magma', ax=ax, cbar_kws={'shrink':0.8, 'label':'fraction cells\nper depth'})
ax.set_ylim((0, 4))
ax.set_yticklabels(depth_df.T.index, rotation=0, horizontalalignment='center')
ax.set_xlim(-0.5, len(depth_df)+0.5)
ax.set_ylabel('')
fig.tight_layout()
In [ ]:
fig, ax = plt.subplots
sns.barplot(data=area_df, x='cluster_id', y='fraction', hue='targeted_structure')
In [ ]:
frequency_table_depth
In [ ]: